# =============================================================================
# April 24 2025
# R code used to produce all results, tables, and figures in the SI Online Appendix D
# Rebecca Cordell
# Unpacking the Role of In-Group Bias in US Public Opinion on Human Rights Violations
# American Journal of Political Science
# https://doi.org/10.7910/DVN/TGAL7M
# =============================================================================

# Clear work environment
rm(list=ls())

# Install Packages
#install.packages("dplyr")
#install.packages("ggplot2")
#install.packages("forcats")
#install.packages("ggpubr")
#install.packages("ggeasy")
#install.packages("lemon")
#install.packages("sandwich")
#install.packages("survey")
#install.packages("lmtest")
#install.packages("remotes")
#remotes::install_version("cregg", version = "0.4.0")

# Required Packages
library("dplyr")
library("ggplot2")
library("forcats")
library("ggpubr")
library("sandwich")
library("survey")
library("lmtest")
library("cregg")
library("ggeasy")
library("lemon")

options(scipen = 999)
options(warn=-1)

# -----------------------------------------------------------------------------
# Read in data
# -----------------------------------------------------------------------------

hr_survey<-read.csv("cordell_ingroupbiashumanrights_data.csv", header=TRUE, stringsAsFactors = FALSE)

# =============================================================================
# Table D.1: Summary of Group Identity Variables
# =============================================================================

# -----------------------------------------------------------------------------
# Calculate summary statistics for group identity variables
# -----------------------------------------------------------------------------

# Create list of variables
match_vars <- c(
  perp_match = "Perpetrator (Partisanship)",
  targ_race_match = "Target (Race)",
  targ_relig_match = "Target (Religion)",
  targ_citiz_match = "Target (Citizenship)",
  elite_match = "Elite Cue (Partisanship)"
)

# Create function to calculate number and percentage of observations per value
process_match_var <- function(varname, label, data, total_n) {
  df <- as.data.frame(table(data[[varname]], useNA = "ifany"))
  names(df) <- c("value", "observations")
  df$value <- ifelse(grepl("_in", df$value), "In-group",
                     ifelse(grepl("_out", df$value), "Out-group", df$value))
  df <- rbind(df, data.frame(value = "Total", observations = sum(df$observations)))
  df$percentage <- round(100 * df$observations / total_n, 1)
  df$variable <- ""
  df$variable[1] <- label
  df <- df[, c("variable", "value", "observations", "percentage")]
  return(df)
}

# Calculate total number of observations
total <- nrow(hr_survey)

# Apply function
match_tables <- Map(process_match_var, names(match_vars), match_vars,
                    MoreArgs = list(data = hr_survey, total_n = total))

# Combine data
tabd1 <- do.call(rbind, match_tables)

# -----------------------------------------------------------------------------
# Create table
# -----------------------------------------------------------------------------

# Print Table D.1
write.csv(tabd1, "tab_d1.csv", row.names=FALSE)

# =============================================================================
# Figure D.1: Effect of the Group Identity Attributes, Alternative Construction of the Group Identity Variables (NAs coded as 0)
# =============================================================================

# Convert NA values into 0
hr_survey$perp_match_na<-hr_survey$perp_match
hr_survey$targ_race_match_na<-hr_survey$targ_race_match
hr_survey$targ_relig_match_na<-hr_survey$targ_relig_match
hr_survey$targ_citiz_match_na<-hr_survey$targ_citiz_match
hr_survey$elite_match_na<-hr_survey$elite_match
hr_survey$perp_match_na[is.na(hr_survey$perp_match_na)]<-"perp_out"
hr_survey$targ_race_match_na[is.na(hr_survey$targ_race_match_na)]<-"race_out"
hr_survey$targ_relig_match_na[is.na(hr_survey$targ_relig_match_na)]<-"relig_out"
hr_survey$elite_match_na[is.na(hr_survey$elite_match_na) &
                           (hr_survey$elite == "A Democrat member of Congress" | hr_survey$elite == "A Republican member of Congress")] <- "elite_out"

# Convert regression variables into factors
factor_vars <- c("perp_match_na", "agent", "type", "scope", "targ_nonstate",
                 "targ_race_match_na", "targ_relig_match_na", "targ_citiz_match_na",
                 "frame", "elite_match_na")
hr_survey[factor_vars] <- lapply(hr_survey[factor_vars], as.factor)

# -----------------------------------------------------------------------------
# Calculate marginal means
# -----------------------------------------------------------------------------

# Model 1
fig_d1_m1 <- cregg::cj(hr_survey, outcome1 ~ perp_match_na + agent + type + scope + targ_nonstate + targ_race_match_na + targ_relig_match_na + targ_citiz_match_na + frame + elite_match_na, id = ~id, estimate = "mm")

# Subset group identity dummy variables
group_vars <- c("perp_match_na", "targ_race_match_na", 
                "targ_relig_match_na", "targ_citiz_match_na", 
                "elite_match_na")
fig_d1_m1 <- fig_d1_m1[fig_d1_m1$feature %in% group_vars,]

# Model 2
fig_d1_m2 <- cregg::cj(hr_survey, outcome2 ~ perp_match_na + agent + type + scope + targ_nonstate + targ_race_match_na + targ_relig_match_na + targ_citiz_match_na + frame + elite_match_na, id = ~id, estimate = "mm")

# Subset group identity dummy variables
fig_d1_m2 <- fig_d1_m2[fig_d1_m2$feature %in% group_vars,]

# -----------------------------------------------------------------------------
# Prepare figures
# -----------------------------------------------------------------------------

# Combine models
fig_d1_all <- rbind(fig_d1_m1, fig_d1_m2)

# Create group identity variable labels
variable_labels <- c(
  "perp_match_na" = "Perpetrator (partisanship)",
  "targ_race_match_na" = "Target (race)",
  "targ_relig_match_na" = "Target (religion)",
  "targ_citiz_match_na" = "Target (citizenship)",
  "elite_match_na" = "Elite cue (partisanship)"
)
fig_d1_all <- dplyr::bind_rows(
  fig_d1_all,
  do.call(rbind, lapply(names(variable_labels), function(variable) {
    data.frame(
      outcome = c("outcome1", "outcome2"),
      variable = variable_labels[variable],
      level = variable_labels[variable],
      estimate = NA, lower = NA, upper = NA
    )
  }))
)

# Create respondents variable (in-groups vs. out-groups)
fig_d1_all <- fig_d1_all %>%
  mutate(
    Respondents = case_when(
      grepl("_in", level) ~ "In-group",
      grepl("_out", level) ~ "Out-group",
      level %in% c("Perpetrator (partisanship)", "Target (race)", 
                   "Target (religion)", "Target (citizenship)", "Elite cue (partisanship)") ~ "In-group",
      TRUE ~ level
    ),
    Respondents = factor(Respondents, levels = c("In-group", "Out-group"))
  )

# Set factor order
level_order <- c(
  "Perpetrator (partisanship)", "perp_in", "perp_out",
  "Target (race)", "race_in", "race_out",
  "Target (religion)", "relig_in", "relig_out",
  "Target (citizenship)", "citiz_in", "citiz_out",
  "Elite cue (partisanship)", "elite_in", "elite_out"
)
fig_d1_all <- fig_d1_all %>%
  mutate(level = factor(level, levels = level_order) %>% fct_rev())

# Separate models
fig_d1_m1 <- filter(fig_d1_all, outcome == "outcome1")
fig_d1_m2 <- filter(fig_d1_all, outcome == "outcome2")

# -----------------------------------------------------------------------------
# Create figures
# -----------------------------------------------------------------------------

# Create panel (a) Disapproval forced-choice
fig_d1_m1_plot <- ggplot(fig_d1_m1, aes(x = level, y = estimate, colour = Respondents)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), size = 0.3, width = 0.2) +
  scale_x_discrete(labels = c(
    "", "Elite cue (partisanship)", "", 
    "", "Target (citizenship)", "", 
    "", "Target (religion)", "", 
    "", "Target (race)", "", 
    "","Perpetrator (partisanship)",""
  )) +
  xlab('') + ylab('Marginal mean') +
  coord_flip() +
  geom_hline(yintercept = 0.5, size = 0.2, color = "black") +
  scale_y_symmetric(mid = 0.5) +
  theme_classic(base_size = 10) +
  scale_colour_grey() +
  easy_center_title() +
  ggtitle("(a) Disapproval forced-choice") +
  theme(
    axis.text.x = element_text(colour = "black"),
    axis.text.y = element_text(colour = "black"),
    axis.title.x = element_text(margin = margin(t = 10)),
    axis.ticks.y = element_blank()
  ) +
  labs(color = NULL)

# Create panel (b) Disapproval ratings-based
fig_d1_m2_plot <- ggplot(fig_d1_m2, aes(x = level, y = estimate, colour = Respondents)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), size = 0.3, width = 0.2) +
  scale_x_discrete(labels = c(
    "", "Elite cue (partisanship)", "", 
    "", "Target (citizenship)", "", 
    "", "Target (religion)", "", 
    "", "Target (race)", "", 
    "","Perpetrator (partisanship)",""
  )) +
  xlab('') + ylab('Marginal mean') +
  coord_flip() +
  theme_classic(base_size = 10) +
  scale_colour_grey() +
  easy_center_title() +
  ggtitle("(b) Disapproval ratings-based") +
  theme(
    axis.text.x = element_text(colour = "black"),
    axis.text.y = element_text(colour = "black"),
    axis.title.x = element_text(margin = margin(t = 10)),
    axis.ticks.y = element_blank()
  ) +
  labs(color = NULL)

# Print Figure D.1
pdf('figure_d1.pdf', width = 8.26, height = 5.82)
ggarrange(fig_d1_m1_plot, NULL, fig_d1_m2_plot,
          nrow = 1, common.legend = T, legend = "bottom", ncol=3, widths = c(1, 0.09, 1))
dev.off()